		
*******************************************************************************
*******************************************************************************;

					*Dyadic price regression

*******************************************************************************;
*******************************************************************************	
#d;
use "${analysis}/prices_dyadic_long", clear;

*Run regressions;
gen abs_log_price_diff=abs(log_retail_price_origin -log_retail_price_dest);
gen log_distance_km=log(distance_km);
gen log_duration_hrs=log(duration_hrs);

egen destination_code=group(destination);
egen origin_code=group(origin);
egen country_code=group(country);

*******;
*All products;
*******;
preserve;
*how many locations are there in these regressions?
keep if log_duration_hrs!=. | log_distance_km!=.;
keep country origin destination;
sort origin destination;
drop if origin==origin[_n-1] | destination==destination[_n-1];
rename origin location1;
rename destination location2;
egen group=group(location1 location2);
reshape long location, i(group) j(location_number);
sort location;
drop if location==location[_n-1];
count;
local locations=r(N);
restore;

sum abs_log_price_diff, d;
xi: cluster2 abs_log_price_diff log_distance_km i.month i.year i.country i.product if abs_log_price_diff<1, fcluster(destination_code) tcluster(origin_code);
egen in_regress=rownonmiss(abs_log_price_diff log_distance_km month year country) if abs_log_price_diff<1, strok;
sum abs_log_price_diff if in_regress==5;
local mean=r(mean);
local sd=r(sd);
quietly tab country if in_regress==5;
local countries=r(r);
drop in_regress;
quietly outreg2 using "${pool_results}/secondary_dyadic_price_dispersion.out", nonote se symb(***,**,*) replace bdec(2) nocons keep(log_distance_km)
 addstat("mean of dep var", `mean', "sd of dep var", `sd', "# of countries", `countries', "locations", `locations', "all products", 1);
 
xi: cluster2 abs_log_price_diff log_duration_hrs i.month i.year i.country i.product if abs_log_price_diff<1, fcluster(destination_code) tcluster(origin_code);
egen in_regress=rownonmiss(abs_log_price_diff log_duration_hrs month year country) if abs_log_price_diff<1, strok;
sum abs_log_price_diff if in_regress==5;
local mean=r(mean);
local sd=r(sd);
quietly tab country if in_regress==5;
local countries=r(r);
drop in_regress;
quietly outreg2 using "${pool_results}/secondary_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(2) nocons keep(log_duration_hrs)
 addstat("mean of dep var", `mean', "sd of dep var", `sd', "# of countries", `countries', "locations", `locations', "all products", 1);

xi: cluster2 abs_log_price_diff log_duration_hrs log_distance_km i.month i.year i.country i.product if abs_log_price_diff<1, fcluster(destination_code) tcluster(origin_code);
egen in_regress=rownonmiss(abs_log_price_diff log_duration_hrs log_distance_km month year country) if abs_log_price_diff<1, strok;
sum abs_log_price_diff if in_regress==6;
local mean=r(mean);
local sd=r(sd);
quietly tab country if in_regress==6;
local countries=r(r);
drop in_regress;
quietly outreg2 using "${pool_results}/secondary_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(2) nocons keep(log_duration_hrs log_distance_km)
 addstat("mean of dep var", `mean', "sd of dep var", `sd', "# of countries", `countries', "locations", `locations', "all products", 1);

*******;
*Maize only;
*******;
preserve;
*how many locations are there in these regressions?
keep if (log_duration_hrs!=. | log_distance_km!=.) & (product=="MAIZE" | product=="MAIZE GRAIN" | product=="MAIZE (WHITE)" | product=="MAIZE (LOCAL)");
keep country origin destination;
sort origin destination;
drop if origin==origin[_n-1] | destination==destination[_n-1];
rename origin location1;
rename destination location2;
egen group=group(location1 location2);
reshape long location, i(group) j(location_number);
sort location;
drop if location==location[_n-1];
count;
local locations=r(N);
restore;

gen maizeonly=1 if (product=="MAIZE" | product=="MAIZE GRAIN" | product=="MAIZE (WHITE)" | product=="MAIZE (LOCAL)");

xi: cluster2 abs_log_price_diff log_distance_km i.month i.year i.country i.product if abs_log_price_diff<1 & maizeonly==1, fcluster(destination_code) tcluster(origin_code);
egen in_regress=rownonmiss(abs_log_price_diff log_distance_km month year country) if abs_log_price_diff<1 & maizeonly==1, strok;
sum abs_log_price_diff if in_regress==5;
local mean=r(mean);
local sd=r(sd);
quietly tab country if in_regress==5;
local countries=r(r);
drop in_regress;
quietly outreg2 using "${pool_results}/secondary_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(2) nocons keep(log_distance_km)
 addstat("mean of dep var", `mean', "sd of dep var", `sd', "# of countries", `countries', "locations", `locations', "maize only", 1);
 
xi: cluster2 abs_log_price_diff log_duration_hrs i.month i.year i.country i.product if abs_log_price_diff<1 & maizeonly==1, fcluster(destination_code) tcluster(origin_code);
egen in_regress=rownonmiss(abs_log_price_diff log_duration_hrs month year country) if abs_log_price_diff<1 & maizeonly==1, strok;
sum abs_log_price_diff if in_regress==5;
local mean=r(mean);
local sd=r(sd);
quietly tab country if in_regress==5;
local countries=r(r);
drop in_regress;
quietly outreg2 using "${pool_results}/secondary_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(2) nocons keep(log_duration_hrs)
 addstat("mean of dep var", `mean', "sd of dep var", `sd', "# of countries", `countries', "locations", `locations', "maize only", 1);

xi: cluster2 abs_log_price_diff log_duration_hrs log_distance_km i.month i.year i.country i.product if abs_log_price_diff<1 & maizeonly==1,fcluster(destination_code) tcluster(origin_code);
egen in_regress=rownonmiss(abs_log_price_diff log_duration_hrs log_distance_km month year country) if abs_log_price_diff<1 & maizeonly==1, strok;
sum abs_log_price_diff if in_regress==6;
local mean=r(mean);
local sd=r(sd);
quietly tab country if in_regress==6;
local countries=r(r);
drop in_regress;
quietly outreg2 using "${pool_results}/secondary_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(2) nocons keep(log_duration_hrs log_distance_km)
 addstat("mean of dep var", `mean', "sd of dep var", `sd', "# of countries", `countries', "locations", `locations', "maize only", 1);


*******;
*Fertilizer only;
*******;
preserve;
gen fertilizer=1 if strpos(product, "UREA") | strpos(product, "NPK") | strpos(product, "DAP") | strpos(product, "CAN") & strpos(product, "FISH")==0;
*how many locations are there in these regressions?
keep if (log_duration_hrs!=. | log_distance_km!=.) & fertilizer==1;
keep country origin destination;
sort origin destination;
drop if origin==origin[_n-1] | destination==destination[_n-1];
rename origin location1;
rename destination location2;
egen group=group(location1 location2);
reshape long location, i(group) j(location_number);
sort location;
drop if location==location[_n-1];
count;
local locations=r(N);
restore;

gen fertilizer=1 if strpos(product, "UREA") | strpos(product, "NPK") | strpos(product, "DAP") | strpos(product, "CAN") & strpos(product, "FISH")==0;
tab product if fertilizer==1;

xi: cluster2 abs_log_price_diff log_distance_km i.month i.year i.country i.product if abs_log_price_diff<1 & fertilizer==1, fcluster(destination_code) tcluster(origin_code);
egen in_regress=rownonmiss(abs_log_price_diff log_distance_km month year country) if abs_log_price_diff<1 & fertilizer==1, strok;
sum abs_log_price_diff if in_regress==5;
local mean=r(mean);
local sd=r(sd);
quietly tab country if in_regress==5;
local countries=r(r);
drop in_regress;
quietly outreg2 using "${pool_results}/secondary_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(2) nocons keep(log_distance_km)
 addstat("mean of dep var", `mean', "sd of dep var", `sd', "# of countries", `countries', "locations", `locations', "fertilizer only", 1);
 
xi: cluster2 abs_log_price_diff log_duration_hrs i.month i.year i.country i.product if abs_log_price_diff<1 & fertilizer==1, fcluster(destination_code) tcluster(origin_code);
egen in_regress=rownonmiss(abs_log_price_diff log_duration_hrs month year country) if abs_log_price_diff<1 & fertilizer==1, strok;
sum abs_log_price_diff if in_regress==5;
local mean=r(mean);
local sd=r(sd);
quietly tab country if in_regress==5;
local countries=r(r);
drop in_regress;
quietly outreg2 using "${pool_results}/secondary_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(2) nocons keep(log_duration_hrs)
 addstat("mean of dep var", `mean', "sd of dep var", `sd', "# of countries", `countries', "locations", `locations', "fertilizer only", 1);

xi: cluster2 abs_log_price_diff log_duration_hrs log_distance_km i.month i.year i.country i.product if abs_log_price_diff<1 & fertilizer==1, fcluster(destination_code) tcluster(origin_code);
egen in_regress=rownonmiss(abs_log_price_diff log_duration_hrs log_distance_km month year country) if abs_log_price_diff<1 & fertilizer==1, strok;
sum abs_log_price_diff if in_regress==6;
local mean=r(mean);
local sd=r(sd);
quietly tab country if in_regress==6;
local countries=r(r);
drop in_regress;
quietly outreg2 using "${pool_results}/secondary_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(2) nocons keep(log_duration_hrs log_distance_km)
 addstat("mean of dep var", `mean', "sd of dep var", `sd', "# of countries", `countries', "locations", `locations', "fertilizer only", 1);





*******************************************************************************
*******************************************************************************;

					*Tanzania dyadic price regression

*******************************************************************************;
*******************************************************************************	

*** Bring point-to-point distances to the dyad data;
	
	#d;
	use "${analysis}/point_to_point_distances", clear;
	drop if distance_km==.;
	sort village_name_origin market_origin ward_origin district_origin village_name_dest market_dest ward_dest district_dest;
	egen x=group(village_name_origin market_origin ward_origin district_origin village_name_dest market_dest ward_dest district_dest);
	drop if x==.;
	duplicates drop;
	tempfile temp_distance;
	save `temp_distance';
	
	use "${analysis}/Tanzania_MaLTprices_dyad", clear;
	count;
	for any village_name market ward district:
	 rename X_1 X_origin \
	 rename X_2 X_dest;
	merge m:1 village_name_origin market_origin ward_origin district_origin village_name_dest market_dest ward_dest district_dest using `temp_distance';
	keep if _merge==3;
	tempfile part1;
	save `part1';
	
	use "${analysis}/Tanzania_MaLTprices_dyad", clear;
	count;
	for any village_name market ward district:
	 rename X_2 X_origin \
	 rename X_1 X_dest;
	merge m:1 village_name_origin market_origin ward_origin district_origin village_name_dest market_dest ward_dest district_dest using `temp_distance';
	keep if _merge==3;
	append using `part1';
	
	gen abs_log_price_diff=abs(log_retail_price_std_w5 -log_retail_price_std_w5_2);
	gen log_distance_km=log(distance_km);
	gen log_duration_hrs=log(duration_hrs);
	
	egen destination_code=group(village_name_dest market_dest ward_dest district_dest);
	egen origin_code=group(village_name_origin market_origin ward_origin district_origin);
	
	order dataset product_name month year *origin *dest *_2;
	
	
*** Run regressions;

	#d;
	*All products;
	cap drop in_regress;
	sum abs_log_price_diff, d;
	xi: cluster2 abs_log_price_diff log_distance_km i.product_name if 
		abs_log_price_diff<1, fcluster(destination_code) tcluster(origin_code);
	egen in_regress=rownonmiss(abs_log_price_diff log_distance_km product_name) if abs_log_price_diff<1, strok;
	sum abs_log_price_diff if in_regress==3;
	local mean=r(mean);
	local sd=r(sd);
	distinct market_origin if in_regress==3; *number of markets;
	local locations=`r(ndistinct)';
	drop in_regress;
	quietly outreg2 using "${pool_results}/TZ_dyadic_price_dispersion.out", nonote se symb(***,**,*) replace bdec(2) nocons keep(log_distance_km)
	 addstat("mean of dep var", `mean', "sd of dep var", `sd', "locations", `locations', "all products", 1);
	 
	xi: cluster2 abs_log_price_diff log_duration_hrs i.product_name if 
		abs_log_price_diff<1, fcluster(destination_code) tcluster(origin_code);
	egen in_regress=rownonmiss(abs_log_price_diff log_duration_hrs product_name) if abs_log_price_diff<1, strok;
	sum abs_log_price_diff if in_regress==3;
	local mean=r(mean);
	local sd=r(sd);
	distinct market_origin if in_regress==3; *number of markets;
	local locations=`r(ndistinct)';
	drop in_regress;
	quietly outreg2 using "${pool_results}/TZ_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(2) nocons keep(log_duration_hrs)
	 addstat("mean of dep var", `mean', "sd of dep var", `sd', "locations", `locations', "all products", 1);
	
	xi: cluster2 abs_log_price_diff log_duration_hrs log_distance_km i.product_name if 
		abs_log_price_diff<1, fcluster(destination_code) tcluster(origin_code);
	egen in_regress=rownonmiss(abs_log_price_diff log_duration_hrs log_distance_km product_name) if abs_log_price_diff<1, strok;
	sum abs_log_price_diff if in_regress==4;
	local mean=r(mean);
	local sd=r(sd);
	distinct market_origin if in_regress==4; *number of markets;
	local locations=`r(ndistinct)';
	drop in_regress;
	quietly outreg2 using "${pool_results}/TZ_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(2) nocons keep(log_duration_hrs log_distance_km)
	 addstat("mean of dep var", `mean', "sd of dep var", `sd', "locations", `locations', "all products", 1);
	 
	 
	
	*Maize Only;
	cap drop in_regress;
	sum abs_log_price_diff, d;
	xi: cluster2 abs_log_price_diff log_distance_km i.product_name i.month i.year if 
		product_name=="MAIZE" & abs_log_price_diff<1, fcluster(destination_code) tcluster(origin_code);
	egen in_regress=rownonmiss(abs_log_price_diff log_distance_km product_name month year) if abs_log_price_diff<1 & product_name=="MAIZE", strok;
	sum abs_log_price_diff if in_regress==5;
	local mean=r(mean);
	local sd=r(sd);
	distinct market_origin if in_regress==5; *number of markets;
	local locations=`r(ndistinct)';
	drop in_regress;
	quietly outreg2 using "${pool_results}/TZ_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(2) nocons keep(log_distance_km)
	 addstat("mean of dep var", `mean', "sd of dep var", `sd', "locations", `locations', "maize only", 1);
	 
	xi: cluster2 abs_log_price_diff log_duration_hrs i.product_name i.month i.year if 
		product_name=="MAIZE" & abs_log_price_diff<1, fcluster(destination_code) tcluster(origin_code);
	egen in_regress=rownonmiss(abs_log_price_diff log_duration_hrs product_name month year) if abs_log_price_diff<1 & product_name=="MAIZE", strok;
	sum abs_log_price_diff if in_regress==5;
	local mean=r(mean);
	local sd=r(sd);
	distinct market_origin if in_regress==5; *number of markets;
	local locations=`r(ndistinct)';
	drop in_regress;
	quietly outreg2 using "${pool_results}/TZ_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(2) nocons keep(log_duration_hrs)
	 addstat("mean of dep var", `mean', "sd of dep var", `sd', "locations", `locations', "maize only", 1);
	
	xi: cluster2 abs_log_price_diff log_duration_hrs log_distance_km i.product_name i.month i.year if 
		product_name=="MAIZE" & abs_log_price_diff<1, fcluster(destination_code) tcluster(origin_code);
	egen in_regress=rownonmiss(abs_log_price_diff log_duration_hrs log_distance_km product_name month year) if abs_log_price_diff<1 & product_name=="MAIZE", strok;
	sum abs_log_price_diff if in_regress==6;
	local mean=r(mean);
	local sd=r(sd);
	distinct market_origin if in_regress==6; *number of markets;
	local locations=`r(ndistinct)';
	drop in_regress;
	quietly outreg2 using "${pool_results}/TZ_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(2) nocons keep(log_duration_hrs log_distance_km)
	 addstat("mean of dep var", `mean', "sd of dep var", `sd', "locations", `locations', "maize only", 1);
	 
	 
	 
	*Fertilizer;
	cap drop in_regress;
	sum abs_log_price_diff, d;
	cap gen fertilizeronly = 1 if (strpos(product_name, "CAN") |strpos(product_name, "DAP") |strpos(product_name, "UREA")|
		strpos(product_name, "YARA") |strpos(product_name, "NPK"));
		
	xi: cluster2 abs_log_price_diff log_distance_km i.product_name if fertilizeronly==1 &
		abs_log_price_diff<1, fcluster(destination_code) tcluster(origin_code);
	egen in_regress=rownonmiss(abs_log_price_diff log_distance_km product_name) if abs_log_price_diff<1 & fertilizeronly==1, strok;
	sum abs_log_price_diff if in_regress==3;
	local mean=r(mean);
	local sd=r(sd);
	distinct market_origin if in_regress==3; *number of markets;
	local locations=`r(ndistinct)';
	drop in_regress;
	quietly outreg2 using "${pool_results}/TZ_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(3) nocons keep(log_distance_km)
	 addstat("mean of dep var", `mean', "sd of dep var", `sd', "locations", `locations', "fertilizer only", 1);
	 
	xi: cluster2 abs_log_price_diff log_duration_hrs i.product_name if fertilizeronly==1 &
		abs_log_price_diff<1, fcluster(destination_code) tcluster(origin_code);
	egen in_regress=rownonmiss(abs_log_price_diff log_duration_hrs product_name) if abs_log_price_diff<1 & fertilizeronly==1, strok;
	sum abs_log_price_diff if in_regress==3;
	local mean=r(mean);
	local sd=r(sd);
	distinct market_origin if in_regress==3; *number of markets;
	local locations=`r(ndistinct)';
	drop in_regress;
	quietly outreg2 using "${pool_results}/TZ_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(3) nocons keep(log_duration_hrs)
	 addstat("mean of dep var", `mean', "sd of dep var", `sd', "locations", `locations', "fertilizer only", 1);
	
	xi: cluster2 abs_log_price_diff log_duration_hrs log_distance_km i.product_name if fertilizeronly==1 &
		abs_log_price_diff<1, fcluster(destination_code) tcluster(origin_code);
	egen in_regress=rownonmiss(abs_log_price_diff log_duration_hrs log_distance_km product_name) if abs_log_price_diff<1 & fertilizeronly==1, strok;
	sum abs_log_price_diff if in_regress==4;
	local mean=r(mean);
	local sd=r(sd);
	distinct market_origin if in_regress==4; *number of markets;
	local locations=`r(ndistinct)';
	drop in_regress;
	quietly outreg2 using "${pool_results}/TZ_dyadic_price_dispersion.out", nonote se symb(***,**,*) append bdec(3) nocons keep(log_duration_hrs log_distance_km)
	 addstat("mean of dep var", `mean', "sd of dep var", `sd', "locations", `locations', "fertilizer only", 1);
